# -*- coding: utf-8 -*-
import scrapy
from news.items import BaiduItem


class BaiduSpider(scrapy.Spider):
    name = 'baidu'
    allowed_domains = ['www.baidu.com', 'top.baidu.com']
    start_urls = ['http://top.baidu.com/buzz?b=1']

    custom_settings = {
        'ITEM_PIPELINES': {
            'news.pipelines.BaiduCheckEmptyPipeline': 10,
            'news.pipelines.BaiduSavePipeline': 300,
        }
    }

    def parse(self, response):
        sort = 0
        for selector in response.xpath('//table[@class="list-table"]//tr'):
            item = BaiduItem()
            item["title"] = selector.xpath('td[@class="keyword"]/a[@class="list-title"]/text()').get()
            item["link"] = selector.xpath('td[@class="keyword"]/a[@class="list-title"]/@href').get()
            item["sort"] = sort
            item["index"] = selector.xpath('td[@class="last"]/span//text()').get()
            item["is_new"] = len(selector.xpath('td[@class="keyword"]/span[contains(@class, "icon-new")]'))
            item["is_rise"] = len(selector.xpath('td[@class="last"]/span[@class="icon-rise"]'))

            item["update_time"] = self.crawler.now

            sort += 1
            yield item
